Last updated: 2020-01-15 16:44:06

# Metadata ----------------------------------------------------------------

meta <- METAdata %>%
  mutate(sample_GUID = gsub("_C[0-9]$", "", GUID))

# Number of GUIDs with metadata entries
length(unique(meta$GUID))
## [1] 6704
# Number of sample_GUIDs with metadata entries
length(unique(meta$sample_GUID))
## [1] 5900
# Number of sample_GUIDs with metadata entries in each category and
# Proportion of sample_GUIDs with metadata entries in each category
meta %>%
  select(sample_GUID, Category) %>%
  unique() %>%
  group_by(Category) %>%
  summarise(count = n(),
            proportion = n() / nrow(.))
## # A tibble: 3 x 3
##   Category count proportion
##   <chr>    <int>      <dbl>
## 1 animal    2253      0.382
## 2 human     2068      0.351
## 3 other     1579      0.268
# Antibiograms ------------------------------------------------------------

atb <- ATBdata %>%
  mutate(sample_GUID = gsub("_C[0-9]$", "", UNIQUE_SPARK_ID))

# Number of GUIDs with antibiogram profiles
length(unique(atb$UNIQUE_SPARK_ID))
## [1] 3740
# Number of sample_GUIDs with antibiogram profiles
length(unique(atb$sample_GUID))
## [1] 2936
# Number of sample_GUIDs with antibiograms in each category and
# Proportion of sample_GUIDs with antibiogram profiles in each category
meta %>%
  filter(sample_GUID %in% atb$sample_GUID) %>%
  select(sample_GUID, Category) %>%
  unique() %>%
  group_by(Category) %>%
  summarise(count = n(),
            proportion = n() / nrow(.))
## # A tibble: 3 x 3
##   Category count proportion
##   <chr>    <int>      <dbl>
## 1 animal     918      0.313
## 2 human     1430      0.487
## 3 other      588      0.200
# Number of sample_GUIDs with antibiogram profiles in each category and
# Proportion of sample_GUIDs with antibiogram profiles in each category
# relative to the total number of sample_GUIDs with metadata entries
meta %>%
  filter(sample_GUID %in% atb$sample_GUID) %>%
  select(sample_GUID, Category) %>%
  unique() %>%
  group_by(Category) %>%
  summarise(n_atb = n()) %>%
  merge(meta %>%
          select(sample_GUID, Category) %>%
          unique() %>%
          group_by(Category) %>%
          summarise(n_meta = n())) %>%
  mutate(percentage = n_atb / n_meta)
##   Category n_atb n_meta percentage
## 1   animal   918   2253  0.4074567
## 2    human  1430   2068  0.6914894
## 3    other   588   1579  0.3723876
# Distribution of numbers of antibiotics tested (that use MIC values)
atb %>%
  filter(used_MIC == "yes",
         Interpretation != "ND") %>%
  select(sample_GUID, Antibiotic_name) %>%
  unique() %>%
  group_by(sample_GUID) %>%
  summarise(count = n()) %$%
  count %>%
  summary()
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   13.00   26.00   27.00   24.96   27.00   28.00
# Find most common resistance profiles (by GUIDs)
tmp <- atb %>%
  filter(used_MIC == "yes",
         Interpretation != "ND")
antibiotics <- as.character(unique(tmp$Antibiotic_name))

profiles <- tmp %>%
  reshape2::dcast(UNIQUE_SPARK_ID + sample_GUID ~ Antibiotic_name,
                  value.var = "Interpretation") %>%
  mutate_at(vars(one_of(antibiotics)), ~ case_when(.=="R" ~ 1, T ~ 0)) %>%
  mutate(profile = apply(.[antibiotics], 1, function(x)
    paste0(x, collapse = "")))

top_profiles <- profiles %>%
  group_by(profile) %>%
  summarise(count = n()) %>%
  arrange(desc(count))
top_profiles
## # A tibble: 413 x 2
##    profile                      count
##    <chr>                        <int>
##  1 0010000000000000000000000000  1897
##  2 0010000000000010000000000000   368
##  3 0000000000000000000000000000   181
##  4 0010000000000000000100000000   154
##  5 0010000000000000000001000000   105
##  6 0110100000000000000000000000    97
##  7 0110101000100000000000000000    34
##  8 0010000000000010000001000000    30
##  9 0000000000000010000000000000    28
## 10 0010000000000010000100000000    21
## # … with 403 more rows
# Most common antibiotic profiles (note that the 3 most common profile is a
# resistance to nothing)
lapply(1:10, function(x) {
  profiles %>%
    filter(profile == top_profiles$profile[x]) %>%
    .[antibiotics] %>%
    colSums() %>%
    .[. != 0] %>%
    names()
})
## [[1]]
## [1] "Ampicillin"
## 
## [[2]]
## [1] "Ampicillin"     "Fosfomycin_G6P"
## 
## [[3]]
## character(0)
## 
## [[4]]
## [1] "Ampicillin"     "Nitrofurantoin"
## 
## [[5]]
## [1] "Ampicillin"   "Piperacillin"
## 
## [[6]]
## [1] "Amoxicillin_clavulanic_acid" "Ampicillin"                 
## [3] "Cefalexin"                  
## 
## [[7]]
## [1] "Amoxicillin_clavulanic_acid" "Ampicillin"                 
## [3] "Cefalexin"                   "Cefixime"                   
## [5] "Cefuroxime"                 
## 
## [[8]]
## [1] "Ampicillin"     "Fosfomycin_G6P" "Piperacillin"  
## 
## [[9]]
## [1] "Fosfomycin_G6P"
## 
## [[10]]
## [1] "Ampicillin"     "Fosfomycin_G6P" "Nitrofurantoin"
# Sequences ---------------------------------------------------------------

kleb <- KLEBdata %>%
  mutate(sample_GUID = gsub("_C[0-9]$", "", GUID))

# Number of GUIDs with seqences
length(unique(kleb$GUID))
## [1] 3510
# Number of sample_GUIDs with seqences
length(unique(kleb$sample_GUID))
## [1] 2797
# Number of sample_GUIDs with sequences in each category and
# Proportion of sample_GUIDs with sequences in each category
meta %>%
  select(sample_GUID, Category) %>%
  unique() %>%
  filter(sample_GUID %in% kleb$sample_GUID) %>%
  group_by(Category) %>%
  summarise(count = n(),
            proportion = n() / nrow(.))
## # A tibble: 3 x 3
##   Category count proportion
##   <chr>    <int>      <dbl>
## 1 animal     877      0.314
## 2 human     1346      0.481
## 3 other      574      0.205
# Number of sample_GUIDs with sequences from each pathogen and
# Proportion of sample_GUIDs with sequences from each pathogen
kleb %>%
  select(sample_GUID, species) %>%
  unique() %>%
  group_by(species) %>%
  summarise(count = n(),
            proportion = n() / nrow(.)) %>%
  arrange(desc(proportion))
## # A tibble: 17 x 3
##    species                                            count proportion
##    <chr>                                              <int>      <dbl>
##  1 Klebsiella pneumoniae                               1533   0.475   
##  2 Klebsiella michiganensis                             281   0.0871  
##  3 Klebsiella variicola subsp. variicola                265   0.0821  
##  4 Klebsiella oxytoca                                   240   0.0744  
##  5 Klebsiella (Raoultella) ornithinolytica              234   0.0725  
##  6 Klebsiella grimontii                                 176   0.0546  
##  7 Klebsiella aerogenes                                 167   0.0518  
##  8 Klebsiella (Raoultella) planticola                   117   0.0363  
##  9 Klebsiella quasipneumoniae subsp. quasipneumoniae     94   0.0291  
## 10 Klebsiella quasipneumoniae subsp. similipneumoniae    42   0.0130  
## 11 Klebsiella pasteurii                                  26   0.00806 
## 12 Klebsiella (Raoultella) terrigena                     22   0.00682 
## 13 Klebsiella spallanzanii                               10   0.00310 
## 14 Klebsiella huaxiensis                                  8   0.00248 
## 15 Raoultella quasiterrigena                              6   0.00186 
## 16 Klebsiella quasivariicola                              4   0.00124 
## 17 unknown                                                1   0.000310
# Start of modelling section ----------------------------------------------

antibiotic_class <- atb %>%
  filter(used_MIC == "yes") %$%
  Classification %>%
  unique()
antibiotic_class
##  [1] "Aminoglycoside"                "Penicillin Combination"       
##  [3] "Penicillin"                    "Monobactam"                   
##  [5] "Cephalosporin"                 "Fluoroquinolone"              
##  [7] "Colistin"                      "Carbapenem"                   
##  [9] "Fosfomycin"                    "Quinolone"                    
## [11] "Nitrofurantoin"                "Penicillin (Penams)"          
## [13] "Tetracycline"                  "Trimethoprim"                 
## [15] "Trimethoprim/Sulfamethoxazole"
resistances <- lapply(antibiotic_class, function(x) {
  id <- atb %>%
    filter(Classification == x,
           Interpretation == "R") %$%
    UNIQUE_SPARK_ID %>%
    unique()

  data <- meta %>%
    mutate(response = case_when(GUID %in% id ~ "resistant",
                                T ~ "not resistant"))

  data %>%
    group_by(Category, response) %>%
    summarise(Count = n()) %>%
    complete(Category = unique(Category),
             response = c("resistant", "not resistant"),
             fill = list(Count = 0)) %>%
    ungroup() %>%
    mutate(class = x)
}) %>%
  do.call(rbind.data.frame, .)

# Put plots in this order
sort_order <- resistances %>%
  filter(response == "resistant") %>%
  group_by(class) %>%
  summarise(total = sum(Count)) %>%
  arrange(desc(total)) %$%
  class
resistances %<>%
  mutate(class = factor(class, levels = sort_order))

# Plot the number of GUIDs resistant to (red) and not resistant to (grey) each
# antibiotic
resistances %>%
  ggplot() + theme_minimal() + facet_wrap(~class) +
  geom_bar(aes(x = Category, y = Count, fill = response),
           colour = "black", stat = "identity") +
  scale_fill_manual(values = c("grey", "red"))

# Plot the resistance to each antibiotic as a percentage (of GUIDs)
resistances %>%
  group_by(Category, class) %>%
  summarise(percentage = Count[response == "resistant"] / sum(Count)) %>%
  ggplot() + theme_minimal() + facet_wrap(~class) +
  geom_bar(aes(x = Category, y = percentage),
           colour = "black", fill = "red", stat = "identity") +
  labs(title = "Percentage of resistant samples")

# List values used in the above plot (percentage resistant) categorised
# as human, animal, or other
resistances %>%
  group_by(Category, class) %>%
  summarise(percentage = Count[response == "resistant"] / sum(Count)) %>%
  data.frame()
##    Category                         class   percentage
## 1    animal                    Penicillin 0.4385048232
## 2    animal        Penicillin Combination 0.0418006431
## 3    animal                 Cephalosporin 0.0365755627
## 4    animal                    Fosfomycin 0.0643086817
## 5    animal           Penicillin (Penams) 0.0293408360
## 6    animal               Fluoroquinolone 0.0164790997
## 7    animal Trimethoprim/Sulfamethoxazole 0.0136655949
## 8    animal                Aminoglycoside 0.0044212219
## 9    animal                Nitrofurantoin 0.0458199357
## 10   animal                    Carbapenem 0.0000000000
## 11   animal                  Trimethoprim 0.0152733119
## 12   animal                  Tetracycline 0.0036173633
## 13   animal                    Monobactam 0.0064308682
## 14   animal                      Colistin 0.0016077170
## 15   animal                     Quinolone 0.0000000000
## 16    human                    Penicillin 0.6233489561
## 17    human        Penicillin Combination 0.2275244994
## 18    human                 Cephalosporin 0.2202812101
## 19    human                    Fosfomycin 0.1261184491
## 20    human           Penicillin (Penams) 0.1887515978
## 21    human               Fluoroquinolone 0.2011077972
## 22    human Trimethoprim/Sulfamethoxazole 0.1708564124
## 23    human                Aminoglycoside 0.1640391990
## 24    human                Nitrofurantoin 0.0639113762
## 25    human                    Carbapenem 0.0920323818
## 26    human                  Trimethoprim 0.0703025138
## 27    human                  Tetracycline 0.0630592245
## 28    human                    Monobactam 0.0519812527
## 29    human                      Colistin 0.0191734129
## 30    human                     Quinolone 0.0000000000
## 31    other                    Penicillin 0.4456928839
## 32    other        Penicillin Combination 0.0518994114
## 33    other                 Cephalosporin 0.0444087747
## 34    other                    Fosfomycin 0.0882825040
## 35    other           Penicillin (Penams) 0.0406634564
## 36    other               Fluoroquinolone 0.0107009096
## 37    other Trimethoprim/Sulfamethoxazole 0.0069555912
## 38    other                Aminoglycoside 0.0048154093
## 39    other                Nitrofurantoin 0.0347779561
## 40    other                    Carbapenem 0.0032102729
## 41    other                  Trimethoprim 0.0074906367
## 42    other                  Tetracycline 0.0037453184
## 43    other                    Monobactam 0.0042803638
## 44    other                      Colistin 0.0005350455
## 45    other                     Quinolone 0.0000000000
# List percentage resistant (combining all human, animal, and other samples)
resistances %>%
  group_by(response, class) %>%
  summarise(total = sum(Count)) %>%
  group_by(class) %>%
  summarise(percentage = total[response == "resistant"] / sum(total)) %>%
  data.frame()
##                            class  percentage
## 1                     Penicillin 0.505220764
## 2         Penicillin Combination 0.109636038
## 3                  Cephalosporin 0.103072792
## 4                     Fosfomycin 0.092631265
## 5            Penicillin (Penams) 0.088305489
## 6                Fluoroquinolone 0.079504773
## 7  Trimethoprim/Sulfamethoxazole 0.066825776
## 8                 Aminoglycoside 0.060411695
## 9                 Nitrofurantoin 0.049075179
## 10                    Carbapenem 0.033114558
## 11                  Trimethoprim 0.032368735
## 12                  Tetracycline 0.024463007
## 13                    Monobactam 0.021778043
## 14                      Colistin 0.007458234
## 15                     Quinolone 0.000000000
# Penicillin --------------------------------------------------------------
# --- Animal
df <- get_data("animal", "Penicillin")
full_model <- glm(response ~ ., binomial(link = "logit"), df)
best_model <- MASS::stepAIC(full_model)
## Start:  AIC=243.08
## response ~ ASSOCIATED_GROUP + Livestock + Companion_animal + 
##     Wild_animal + PATHOGEN
## 
## 
## Step:  AIC=243.08
## response ~ ASSOCIATED_GROUP + Livestock + Companion_animal + 
##     PATHOGEN
## 
##                    Df Deviance    AIC
## - ASSOCIATED_GROUP  5    210.8  238.8
## - Companion_animal  1    205.8  241.8
## - Livestock         1    207.1  243.1
## <none>                   205.1  243.1
## - PATHOGEN         11   3203.0 3219.0
## 
## Step:  AIC=238.79
## response ~ Livestock + Companion_animal + PATHOGEN
## 
##                    Df Deviance    AIC
## - Companion_animal  1    211.0  237.0
## - Livestock         1    211.5  237.5
## <none>                   210.8  238.8
## - PATHOGEN         11   3398.9 3404.9
## 
## Step:  AIC=237.03
## response ~ Livestock + PATHOGEN
## 
##             Df Deviance    AIC
## - Livestock  1    212.3  236.3
## <none>            211.0  237.0
## - PATHOGEN  11   3410.2 3414.2
## 
## Step:  AIC=236.33
## response ~ PATHOGEN
## 
##            Df Deviance    AIC
## <none>           212.3  236.3
## - PATHOGEN 11   3411.4 3413.4
title <- gsub("response", "Penicillin", deparse(best_model$formula))
apen <- plotROC(df, best_model, title)
apen

# --- Human
df <- get_data("human", "Penicillin")

full_model <- glm(response ~ ., binomial(link = "logit"), df %>%
                    select(-HOSPITAL_WARD))
best_model <- MASS::stepAIC(full_model)
## Start:  AIC=612.29
## response ~ Clinical + SPECIFIC_GROUP + PATHOGEN + SEX + AGE + 
##     AGE_GROUP
## 
##                  Df Deviance     AIC
## - AGE_GROUP       9   560.57  600.57
## - SEX             1   554.85  610.85
## - AGE             1   556.15  612.15
## <none>                554.29  612.29
## - Clinical        2   561.14  615.14
## - SPECIFIC_GROUP  5  1147.93 1195.93
## - PATHOGEN       10  2784.35 2822.35
## 
## Step:  AIC=600.57
## response ~ Clinical + SPECIFIC_GROUP + PATHOGEN + SEX + AGE
## 
##                  Df Deviance     AIC
## - SEX             1   561.08  599.08
## <none>                560.57  600.57
## - AGE             1   564.20  602.20
## - Clinical        2   567.35  603.35
## - SPECIFIC_GROUP  5  1174.75 1204.75
## - PATHOGEN       10  2793.18 2813.18
## 
## Step:  AIC=599.08
## response ~ Clinical + SPECIFIC_GROUP + PATHOGEN + AGE
## 
##                  Df Deviance     AIC
## <none>                561.08  599.08
## - AGE             1   564.59  600.59
## - Clinical        2   567.90  601.90
## - SPECIFIC_GROUP  5  1197.08 1225.08
## - PATHOGEN       10  2793.19 2811.19
best_model$formula
## response ~ Clinical + SPECIFIC_GROUP + PATHOGEN + AGE
# Adding ward to this as a random effect results in an error

title <- gsub("response", "Penicillin", deparse(best_model$formula))
hpen <- plotROC(df, best_model, title)
hpen

# --- Other
df <- get_data("other", "Penicillin")
full_model <- glm(response ~ ., binomial(link = "logit"), df)
best_model <- MASS::stepAIC(full_model)
## Start:  AIC=1906.72
## response ~ Plant_pre_harvest + Plant_post_harvest + Water + Soil + 
##     Other + Hospital + ORIGIN
## 
##                      Df Deviance    AIC
## - Plant_post_harvest  1   1873.2 1905.2
## <none>                    1872.7 1906.7
## - Plant_pre_harvest   1   1876.0 1908.0
## - Other               1   1876.9 1908.9
## - Soil                1   1896.7 1928.7
## - Water               1   1901.0 1933.0
## - Hospital            3   1916.8 1944.8
## - ORIGIN              8   1934.2 1952.2
## 
## Step:  AIC=1905.18
## response ~ Plant_pre_harvest + Water + Soil + Other + Hospital + 
##     ORIGIN
## 
##                     Df Deviance    AIC
## <none>                   1873.2 1905.2
## - Plant_pre_harvest  1   1878.5 1908.5
## - Other              1   1879.7 1909.7
## - Soil               1   1899.3 1929.3
## - Hospital           3   1919.1 1945.1
## - ORIGIN             8   1934.8 1950.8
## - Water              1   1974.1 2004.1
title <- paste(deparse(best_model$formula), collapse = "") %>%
  gsub("response", "Penicillin", .) %>%
  gsub("    ", "", .)
open <- plotROC(df, best_model, title)
open

# Penicillin Combination --------------------------------------------------
# --- Animal
df <- get_data("animal", "Penicillin Combination")
full_model <- glm(response ~ ., binomial(link = "logit"),  df)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
tmp <- alias(full_model)$Complete
colinear_variables <- tmp[, which(colSums(tmp) != 0), drop = F]
df %<>% select(-Wild_animal)
full_model <- glm(response ~ ., binomial(link = "logit"), df)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
df %<>% select(-PATHOGEN)
full_model <- glm(response ~ ., binomial(link = "logit"), df)
best_model <- MASS::stepAIC(full_model)
## Start:  AIC=856.54
## response ~ ASSOCIATED_GROUP + Livestock + Companion_animal
## 
##                    Df Deviance    AIC
## - Livestock         1   842.36 856.36
## <none>                  840.54 856.54
## - Companion_animal  1   843.10 857.10
## - ASSOCIATED_GROUP  5   858.48 864.48
## 
## Step:  AIC=856.36
## response ~ ASSOCIATED_GROUP + Companion_animal
## 
##                    Df Deviance    AIC
## <none>                  842.36 856.36
## - Companion_animal  1   844.88 856.88
## - ASSOCIATED_GROUP  5   859.68 863.68
title <- gsub("response", "PenC", deparse(best_model$formula))
apenc <- plotROC(df, best_model, title)
apenc

# --- Human
df <- get_data("human", "Penicillin Combination")
full_model <- glm(response ~ ., binomial(link = "logit"), df %>%
                    select(-HOSPITAL_WARD))
best_model <- MASS::stepAIC(full_model)
## Start:  AIC=1718.27
## response ~ Clinical + SPECIFIC_GROUP + PATHOGEN + SEX + AGE + 
##     AGE_GROUP
## 
##                  Df Deviance    AIC
## - AGE             1   1660.6 1716.6
## <none>                1660.3 1718.3
## - AGE_GROUP       9   1679.2 1719.2
## - Clinical        2   1677.1 1731.1
## - SEX             1   1676.0 1732.0
## - SPECIFIC_GROUP  5   1716.6 1764.6
## - PATHOGEN       10   2346.5 2384.5
## 
## Step:  AIC=1716.59
## response ~ Clinical + SPECIFIC_GROUP + PATHOGEN + SEX + AGE_GROUP
## 
##                  Df Deviance    AIC
## <none>                1660.6 1716.6
## - AGE_GROUP       9   1689.2 1727.2
## - Clinical        2   1677.4 1729.4
## - SEX             1   1676.3 1730.3
## - SPECIFIC_GROUP  5   1717.6 1763.6
## - PATHOGEN       10   2346.6 2382.6
title <- gsub("response", "PenC", deparse(best_model$formula))
hpenc <- plotROC(df, best_model, title)
hpenc

# --- Other
df <- get_data("other", "Penicillin Combination")
full_model <- glm(response ~ ., binomial(link = "logit"), df)
best_model <- MASS::stepAIC(full_model)
## Start:  AIC=712.14
## response ~ Plant_pre_harvest + Plant_post_harvest + Water + Soil + 
##     Other + Hospital + ORIGIN
## 
##                      Df Deviance    AIC
## - Plant_pre_harvest   1   678.22 710.22
## - Other               1   678.47 710.47
## - Plant_post_harvest  1   678.50 710.50
## - ORIGIN              8   692.95 710.95
## <none>                    678.14 712.14
## - Water               1   681.05 713.05
## - Soil                1   684.72 716.72
## - Hospital            3   691.53 719.53
## 
## Step:  AIC=710.22
## response ~ Plant_post_harvest + Water + Soil + Other + Hospital + 
##     ORIGIN
## 
##                      Df Deviance    AIC
## - Other               1   678.51 708.51
## - Plant_post_harvest  1   678.53 708.53
## - ORIGIN              8   693.26 709.26
## <none>                    678.22 710.22
## - Soil                1   685.07 715.07
## - Hospital            3   694.55 720.55
## - Water               1   694.41 724.41
## 
## Step:  AIC=708.51
## response ~ Plant_post_harvest + Water + Soil + Hospital + ORIGIN
## 
##                      Df Deviance    AIC
## - Plant_post_harvest  1   678.72 706.72
## - ORIGIN              8   693.36 707.36
## <none>                    678.51 708.51
## - Hospital            3   697.24 721.24
## - Soil                1   695.02 723.02
## - Water               1   726.13 754.13
## 
## Step:  AIC=706.72
## response ~ Water + Soil + Hospital + ORIGIN
## 
##            Df Deviance    AIC
## <none>          678.72 706.72
## - ORIGIN    8   695.81 707.81
## - Hospital  3   697.51 719.51
## - Soil      1   695.04 721.04
## - Water     1   727.18 753.18
title <- gsub("response", "PenC", deparse(best_model$formula))
openc <- plotROC(df, best_model, title)
openc

# Cephalosporin -----------------------------------------------------------
# --- Animal
df <- get_data("animal", "Cephalosporin")
full_model <- glm(response ~ ., binomial(link = "logit"), df)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
tmp <- alias(full_model)$Complete
colinear_variables <- tmp[, which(colSums(tmp) != 0), drop = F]
df %<>% select(-PATHOGEN, -Wild_animal)
full_model <- glm(response ~ ., binomial(link = "logit"), df)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
df %<>% filter(ASSOCIATED_GROUP != "cheese")
full_model <- glm(response ~ ., binomial(link = "logit"), df)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
model1 <- glm(response ~ ASSOCIATED_GROUP, binomial(link = "logit"), df)
model2 <- glm(response ~ Livestock + Companion_animal,
              binomial(link = "logit"), df)
AIC(model1, model2) # model1 is better
##        df      AIC
## model1  5 760.8327
## model2  3 784.6170
title <- gsub("response", "Ceph", deparse(best_model$formula))
acep <- plotROC(df, model1, title)
acep

# --- Human
df <- get_data("human", "Cephalosporin")
full_model <- glm(response ~ ., binomial(link = "logit"), df %>%
                    select(-HOSPITAL_WARD))
best_model <- MASS::stepAIC(full_model)
## Start:  AIC=1629.3
## response ~ Clinical + SPECIFIC_GROUP + PATHOGEN + SEX + AGE + 
##     AGE_GROUP
## 
##                  Df Deviance    AIC
## - AGE             1   1571.3 1627.3
## <none>                1571.3 1629.3
## - AGE_GROUP       9   1589.8 1629.8
## - Clinical        2   1586.9 1640.9
## - SEX             1   1597.4 1653.4
## - SPECIFIC_GROUP  5   1638.0 1686.0
## - PATHOGEN       10   2276.0 2314.0
## 
## Step:  AIC=1627.3
## response ~ Clinical + SPECIFIC_GROUP + PATHOGEN + SEX + AGE_GROUP
## 
##                  Df Deviance    AIC
## <none>                1571.3 1627.3
## - Clinical        2   1586.9 1638.9
## - AGE_GROUP       9   1606.0 1644.0
## - SEX             1   1597.5 1651.5
## - SPECIFIC_GROUP  5   1638.2 1684.2
## - PATHOGEN       10   2276.7 2312.7
title <- gsub("response", "Ceph", deparse(best_model$formula))
hcep <- plotROC(df, best_model, title)
hcep

# --- Other
df <- get_data("other", "Cephalosporin")
full_model <- glm(response ~ ., binomial(link = "logit"), df)
best_model <- MASS::stepAIC(full_model)
## Start:  AIC=638.84
## response ~ Plant_pre_harvest + Plant_post_harvest + Water + Soil + 
##     Other + Hospital + ORIGIN
## 
##                      Df Deviance    AIC
## - Water               1   605.31 637.31
## - Plant_post_harvest  1   606.22 638.22
## - Plant_pre_harvest   1   606.23 638.23
## - Other               1   606.30 638.30
## <none>                    604.84 638.84
## - ORIGIN              8   621.59 639.59
## - Soil                1   609.44 641.44
## - Hospital            3   616.32 644.32
## 
## Step:  AIC=637.31
## response ~ Plant_pre_harvest + Plant_post_harvest + Soil + Other + 
##     Hospital + ORIGIN
## 
##                      Df Deviance    AIC
## <none>                    605.31 637.31
## - ORIGIN              8   622.44 638.44
## - Soil                1   609.66 639.66
## - Plant_post_harvest  1   611.50 641.50
## - Hospital            3   617.59 643.59
## - Plant_pre_harvest   1   618.41 648.41
## - Other               1   634.90 664.90
title <- paste(deparse(best_model$formula), collapse = "") %>%
  gsub("response", "Ceph", .) %>%
  gsub("    ", "", .)
ocep <- plotROC(df, best_model, title)
ocep

# Fosfomycin --------------------------------------------------------------
# --- Animal
df <- get_data("animal", "Fosfomycin")
full_model <- glm(response ~ ., binomial(link = "logit"), df)
best_model <- MASS::stepAIC(full_model)
## Start:  AIC=838.34
## response ~ ASSOCIATED_GROUP + Livestock + Companion_animal + 
##     Wild_animal + PATHOGEN
## 
## 
## Step:  AIC=838.34
## response ~ ASSOCIATED_GROUP + Livestock + Companion_animal + 
##     PATHOGEN
## 
##                    Df Deviance     AIC
## - ASSOCIATED_GROUP  5   808.38  836.38
## <none>                  800.34  838.34
## - Livestock         1   803.22  839.22
## - Companion_animal  1   803.67  839.67
## - PATHOGEN         11  1163.80 1179.80
## 
## Step:  AIC=836.38
## response ~ Livestock + Companion_animal + PATHOGEN
## 
##                    Df Deviance     AIC
## - Livestock         1   809.73  835.73
## - Companion_animal  1   810.06  836.06
## <none>                  808.38  836.38
## - PATHOGEN         11  1179.74 1185.74
## 
## Step:  AIC=835.73
## response ~ Companion_animal + PATHOGEN
## 
##                    Df Deviance     AIC
## - Companion_animal  1   810.36  834.36
## <none>                  809.73  835.73
## - PATHOGEN         11  1185.24 1189.24
## 
## Step:  AIC=834.36
## response ~ PATHOGEN
## 
##            Df Deviance     AIC
## <none>          810.36  834.36
## - PATHOGEN 11  1187.58 1189.58
title <- gsub("response", "Fos", deparse(best_model$formula))
afos <- plotROC(df, best_model, title)
afos

# --- Human
df <- get_data("human", "Fosfomycin")
full_model <- glm(response ~ ., binomial(link = "logit"), df %>%
                    select(-HOSPITAL_WARD))
best_model <- MASS::stepAIC(full_model)
## Start:  AIC=1513.17
## response ~ Clinical + SPECIFIC_GROUP + PATHOGEN + SEX + AGE + 
##     AGE_GROUP
## 
##                  Df Deviance    AIC
## - SPECIFIC_GROUP  5   1458.6 1506.6
## - AGE_GROUP       9   1466.8 1506.8
## - SEX             1   1455.5 1511.5
## - AGE             1   1456.2 1512.2
## <none>                1455.2 1513.2
## - Clinical        2   1460.1 1514.1
## - PATHOGEN       10   1731.8 1769.8
## 
## Step:  AIC=1506.63
## response ~ Clinical + PATHOGEN + SEX + AGE + AGE_GROUP
## 
##             Df Deviance    AIC
## - AGE_GROUP  9   1470.7 1500.7
## - SEX        1   1459.2 1505.2
## - AGE        1   1459.4 1505.4
## <none>           1458.6 1506.6
## - Clinical   2   1463.6 1507.6
## - PATHOGEN  10   1739.3 1767.3
## 
## Step:  AIC=1500.66
## response ~ Clinical + PATHOGEN + SEX + AGE
## 
##            Df Deviance    AIC
## - SEX       1   1471.0 1499.0
## <none>          1470.7 1500.7
## - Clinical  2   1475.2 1501.2
## - AGE       1   1475.0 1503.0
## - PATHOGEN 10   1754.6 1764.6
## 
## Step:  AIC=1499.05
## response ~ Clinical + PATHOGEN + AGE
## 
##            Df Deviance    AIC
## <none>          1471.0 1499.0
## - Clinical  2   1475.4 1499.4
## - AGE       1   1475.2 1501.2
## - PATHOGEN 10   1755.4 1763.4
title <- gsub("response", "Fos", deparse(best_model$formula))
hfos <- plotROC(df, best_model, title)
hfos

# --- Other
df <- get_data("other", "Fosfomycin")
full_model <- glm(response ~ ., binomial(link = "logit"), df)
best_model <- MASS::stepAIC(full_model)
## Start:  AIC=1069.9
## response ~ Plant_pre_harvest + Plant_post_harvest + Water + Soil + 
##     Other + Hospital + ORIGIN
## 
##                      Df Deviance    AIC
## - Other               1   1037.9 1069.9
## <none>                    1035.9 1069.9
## - ORIGIN              8   1055.2 1073.2
## - Plant_post_harvest  1   1041.9 1073.9
## - Hospital            3   1046.3 1074.3
## - Plant_pre_harvest   1   1043.2 1075.2
## - Soil                1   1043.3 1075.3
## - Water               1   1049.4 1081.4
## 
## Step:  AIC=1069.86
## response ~ Plant_pre_harvest + Plant_post_harvest + Water + Soil + 
##     Hospital + ORIGIN
## 
##                      Df Deviance    AIC
## <none>                    1037.9 1069.9
## - Hospital            3   1046.3 1072.3
## - ORIGIN              8   1057.3 1073.3
## - Plant_post_harvest  1   1043.3 1073.3
## - Soil                1   1043.7 1073.7
## - Plant_pre_harvest   1   1046.5 1076.5
## - Water               1   1080.9 1110.9
title <- paste(deparse(best_model$formula), collapse = "") %>%
  gsub("response", "Fos", .) %>%
  gsub("    ", "", .)
ofos <- plotROC(df, best_model, title)
ofos

# Penicillin (Penams) -----------------------------------------------------
# --- Animal
df <- get_data("animal", "Penicillin (Penams)")
full_model <- glm(response ~ ., binomial(link = "logit"), df)
best_model <- MASS::stepAIC(full_model)
## Start:  AIC=543.13
## response ~ ASSOCIATED_GROUP + Livestock + Companion_animal + 
##     Wild_animal + PATHOGEN
## 
## 
## Step:  AIC=543.13
## response ~ ASSOCIATED_GROUP + Livestock + Companion_animal + 
##     PATHOGEN
## 
##                    Df Deviance    AIC
## - ASSOCIATED_GROUP  5   510.61 538.61
## - Livestock         1   505.15 541.15
## <none>                  505.13 543.13
## - Companion_animal  1   507.54 543.54
## - PATHOGEN         11   645.58 661.58
## 
## Step:  AIC=538.61
## response ~ Livestock + Companion_animal + PATHOGEN
## 
##                    Df Deviance    AIC
## - Livestock         1   510.67 536.67
## <none>                  510.61 538.61
## - Companion_animal  1   521.15 547.15
## - PATHOGEN         11   648.86 654.86
## 
## Step:  AIC=536.67
## response ~ Companion_animal + PATHOGEN
## 
##                    Df Deviance    AIC
## <none>                  510.67 536.67
## - Companion_animal  1   525.99 549.99
## - PATHOGEN         11   648.98 652.98
title <- gsub("response", "Pen (Pen)", deparse(best_model$formula))
app <- plotROC(df, best_model, title)
app

# --- Human
df <- get_data("human", "Penicillin (Penams)")
full_model <- glm(response ~ ., binomial(link = "logit"), df %>%
                    select(-HOSPITAL_WARD))
best_model <- MASS::stepAIC(full_model)
## Start:  AIC=1738.59
## response ~ Clinical + SPECIFIC_GROUP + PATHOGEN + SEX + AGE + 
##     AGE_GROUP
## 
##                  Df Deviance    AIC
## - AGE_GROUP       9   1690.8 1730.8
## - AGE             1   1680.6 1736.6
## <none>                1680.6 1738.6
## - SEX             1   1686.1 1742.1
## - Clinical        2   1705.7 1759.7
## - SPECIFIC_GROUP  5   1753.9 1801.9
## - PATHOGEN       10   2162.9 2200.9
## 
## Step:  AIC=1730.81
## response ~ Clinical + SPECIFIC_GROUP + PATHOGEN + SEX + AGE
## 
##                  Df Deviance    AIC
## <none>                1690.8 1730.8
## - AGE             1   1694.8 1732.8
## - SEX             1   1695.8 1733.8
## - Clinical        2   1717.3 1753.3
## - SPECIFIC_GROUP  5   1766.8 1796.8
## - PATHOGEN       10   2173.4 2193.4
title <- gsub("response", "Pen (Pen)", deparse(best_model$formula))
hpp <- plotROC(df, best_model, title)
hpp

# --- Other
df <- get_data("other", "Penicillin (Penams)")
full_model <- glm(response ~ ., binomial(link = "logit"), df)
best_model <- MASS::stepAIC(full_model)
## Start:  AIC=606.75
## response ~ Plant_pre_harvest + Plant_post_harvest + Water + Soil + 
##     Other + Hospital + ORIGIN
## 
##                      Df Deviance    AIC
## - ORIGIN              8   582.19 600.19
## - Other               1   572.77 604.77
## - Plant_pre_harvest   1   572.78 604.78
## - Plant_post_harvest  1   574.05 606.05
## - Soil                1   574.17 606.17
## <none>                    572.75 606.75
## - Water               1   575.94 607.94
## - Hospital            3   592.55 620.55
## 
## Step:  AIC=600.19
## response ~ Plant_pre_harvest + Plant_post_harvest + Water + Soil + 
##     Other + Hospital
## 
##                      Df Deviance    AIC
## - Other               1   582.20 598.20
## - Plant_pre_harvest   1   582.22 598.22
## - Plant_post_harvest  1   583.71 599.71
## - Soil                1   583.86 599.86
## <none>                    582.19 600.19
## - Water               1   584.82 600.82
## - Hospital            3   604.15 616.15
## 
## Step:  AIC=598.2
## response ~ Plant_pre_harvest + Plant_post_harvest + Water + Soil + 
##     Hospital
## 
##                      Df Deviance    AIC
## - Plant_pre_harvest   1   582.37 596.37
## - Soil                1   584.10 598.10
## <none>                    582.20 598.20
## - Plant_post_harvest  1   586.92 600.92
## - Water               1   604.06 618.06
## - Hospital            3   621.63 631.63
## 
## Step:  AIC=596.37
## response ~ Plant_post_harvest + Water + Soil + Hospital
## 
##                      Df Deviance    AIC
## <none>                    582.37 596.37
## - Soil                1   585.42 597.42
## - Plant_post_harvest  1   588.12 600.12
## - Water               1   607.22 619.22
## - Hospital            3   622.29 630.29
title <- paste(deparse(best_model$formula), collapse = "") %>%
  gsub("response", "Pen (Pen)", .) %>%
  gsub("    ", "", .)
opp <- plotROC(df, best_model, title)
opp

# Fluoroquinolone ---------------------------------------------------------
# --- Animal
df <- get_data("animal", "Fluoroquinolone")
full_model <- glm(response ~ ., binomial(link = "logit"), df)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
tmp <- alias(full_model)$Complete
colinear_variables <- tmp[, which(colSums(tmp) != 0), drop = F]
df %<>% select(-PATHOGEN, -Wild_animal)
full_model <- glm(response ~ ., binomial(link = "logit"), df)
best_model <- MASS::stepAIC(full_model)
## Start:  AIC=419.79
## response ~ ASSOCIATED_GROUP + Livestock + Companion_animal
## 
##                    Df Deviance    AIC
## - ASSOCIATED_GROUP  5   405.91 411.91
## - Livestock         1   404.99 418.99
## - Companion_animal  1   405.03 419.03
## <none>                  403.79 419.79
## 
## Step:  AIC=411.91
## response ~ Livestock + Companion_animal
## 
##                    Df Deviance    AIC
## <none>                  405.91 411.91
## - Companion_animal  1   415.23 419.23
## - Livestock         1   417.19 421.19
title <- gsub("response", "Flu", deparse(best_model$formula))
aflu <- plotROC(df, best_model, title)
aflu

# --- Human
df <- get_data("human", "Fluoroquinolone")
full_model <- glm(response ~ ., binomial(link = "logit"), df %>%
                    select(-HOSPITAL_WARD))
best_model <- MASS::stepAIC(full_model)
## Start:  AIC=1605.05
## response ~ Clinical + SPECIFIC_GROUP + PATHOGEN + SEX + AGE + 
##     AGE_GROUP
## 
##                  Df Deviance    AIC
## - AGE             1   1547.0 1603.0
## - AGE_GROUP       9   1563.3 1603.3
## <none>                1547.0 1605.0
## - SEX             1   1560.0 1616.0
## - Clinical        2   1562.2 1616.2
## - SPECIFIC_GROUP  5   1628.3 1676.3
## - PATHOGEN       10   2129.1 2167.1
## 
## Step:  AIC=1603.05
## response ~ Clinical + SPECIFIC_GROUP + PATHOGEN + SEX + AGE_GROUP
## 
##                  Df Deviance    AIC
## <none>                1547.0 1603.0
## - AGE_GROUP       9   1574.2 1612.2
## - SEX             1   1560.0 1614.0
## - Clinical        2   1562.2 1614.2
## - SPECIFIC_GROUP  5   1628.7 1674.7
## - PATHOGEN       10   2129.3 2165.3
title <- gsub("response", "Flu", deparse(best_model$formula))
hflu <- plotROC(df, best_model, title)
hflu

# --- Other
df <- get_data("other", "Fluoroquinolone")
full_model <- glm(response ~ ., binomial(link = "logit"), df)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
table(df$response)
## 
##    0    1 
## 1849   20
# Trimethoprim/Sulfamethoxazole -------------------------------------------
# --- Animal
df <- get_data("animal", "Trimethoprim/Sulfamethoxazole")
full_model <- glm(response ~ ., binomial(link = "logit"), df)
## Warning: glm.fit: algorithm did not converge

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
full_model <- glm(response ~ ., binomial(link = "logit"), df, maxit = 100)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
tmp <- alias(full_model)$Complete
colinear_variables <- tmp[, which(colSums(tmp) != 0), drop = F]
df %<>% select(-PATHOGEN, -Wild_animal)
full_model <- glm(response ~ ., binomial(link = "logit"), df)
best_model <- MASS::stepAIC(full_model)
## Start:  AIC=356.51
## response ~ ASSOCIATED_GROUP + Livestock + Companion_animal
## 
##                    Df Deviance    AIC
## - ASSOCIATED_GROUP  5   343.78 349.78
## - Livestock         1   341.35 355.35
## - Companion_animal  1   341.43 355.43
## <none>                  340.51 356.51
## 
## Step:  AIC=349.78
## response ~ Livestock + Companion_animal
## 
##                    Df Deviance    AIC
## <none>                  343.78 349.78
## - Companion_animal  1   357.03 361.03
## - Livestock         1   358.24 362.24
title <- gsub("response", "Tri/Sul", deparse(best_model$formula))
ats <- plotROC(df, best_model, title)
ats

# --- Human
df <- get_data("human", "Trimethoprim/Sulfamethoxazole")
full_model <- glm(response ~ ., binomial(link = "logit"), df %>%
                    select(-HOSPITAL_WARD))
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
tmp <- alias(full_model)$Complete
df %<>% select(-PATHOGEN)
full_model <- glm(response ~ ., binomial(link = "logit"), df %>%
                    select(-HOSPITAL_WARD))
best_model <- MASS::stepAIC(full_model)
## Start:  AIC=2008.93
## response ~ Clinical + SPECIFIC_GROUP + SEX + AGE + AGE_GROUP
## 
##                  Df Deviance    AIC
## - AGE             1   1971.2 2007.2
## <none>                1970.9 2008.9
## - Clinical        2   1976.7 2010.7
## - SEX             1   1977.8 2013.8
## - AGE_GROUP       9   1994.1 2014.1
## - SPECIFIC_GROUP  5   2061.7 2089.7
## 
## Step:  AIC=2007.22
## response ~ Clinical + SPECIFIC_GROUP + SEX + AGE_GROUP
## 
##                  Df Deviance    AIC
## <none>                1971.2 2007.2
## - Clinical        2   1976.9 2008.9
## - SEX             1   1978.1 2012.1
## - AGE_GROUP       9   2005.0 2023.0
## - SPECIFIC_GROUP  5   2061.7 2087.7
title <- gsub("response", "Tri/Sul", deparse(best_model$formula))
hts <- plotROC(df, best_model, title)
hts

# --- Other
df <- get_data("other", "Fluoroquinolone")
full_model <- glm(response ~ ., binomial(link = "logit"), df)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
table(df$response)
## 
##    0    1 
## 1849   20
# Aminoglycoside ----------------------------------------------------------
# --- Animal
df <- get_data("animal", "Aminoglycoside")

full_model <- glm(response ~ ., binomial(link = "logit"), df)
## Warning: glm.fit: algorithm did not converge

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
full_model <- glm(response ~ ., binomial(link = "logit"), df, maxit = 100)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
tmp <- alias(full_model)$Complete
colinear_variables <- tmp[, which(colSums(tmp) != 0), drop = F]
df %<>% select(-PATHOGEN, -Wild_animal)
full_model <- glm(response ~ ., binomial(link = "logit"), df)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
model1 <- glm(response ~ ASSOCIATED_GROUP, binomial(link = "logit"), df)
model2 <- glm(response ~ Livestock + Companion_animal,
              binomial(link = "logit"), df)
AIC(model1, model2) # model2 is better
##        df      AIC
## model1  6 144.5713
## model2  3 130.3123
title <- gsub("response", "Amin", deparse(best_model$formula))
aam <- plotROC(df, model2, title)
aam

# --- Human
df <- get_data("human", "Aminoglycoside")
full_model <- glm(response ~ ., binomial(link = "logit"), df %>%
                    select(-HOSPITAL_WARD))
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
tmp <- alias(full_model)$Complete
df %<>% select(-PATHOGEN)
full_model <- glm(response ~ ., binomial(link = "logit"), df %>%
                    select(-HOSPITAL_WARD))
best_model <- MASS::stepAIC(full_model)
## Start:  AIC=1914.63
## response ~ Clinical + SPECIFIC_GROUP + SEX + AGE + AGE_GROUP
## 
##                  Df Deviance    AIC
## - AGE             1   1876.6 1912.6
## <none>                1876.6 1914.6
## - Clinical        2   1881.1 1915.1
## - AGE_GROUP       9   1895.5 1915.5
## - SEX             1   1892.7 1928.7
## - SPECIFIC_GROUP  5   1984.2 2012.2
## 
## Step:  AIC=1912.63
## response ~ Clinical + SPECIFIC_GROUP + SEX + AGE_GROUP
## 
##                  Df Deviance    AIC
## <none>                1876.6 1912.6
## - Clinical        2   1881.1 1913.1
## - SEX             1   1892.7 1926.7
## - AGE_GROUP       9   1929.2 1947.2
## - SPECIFIC_GROUP  5   1984.5 2010.5
title <- gsub("response", "Amin", deparse(best_model$formula))
ham <- plotROC(df, best_model, title)
ham

# --- Other
df <- get_data("other", "Aminoglycoside")
full_model <- glm(response ~ ., binomial(link = "logit"), df)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
table(df$response)
## 
##    0    1 
## 1860    9
# Nitrofurantoin ----------------------------------------------------------
# --- Animal
df <- get_data("animal", "Nitrofurantoin")
full_model <- glm(response ~ ., binomial(link = "logit"), df)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
tmp <- alias(full_model)$Complete
colinear_variables <- tmp[, which(colSums(tmp) != 0), drop = F]
df %<>% select(-PATHOGEN, -Wild_animal)
full_model <- glm(response ~ ., binomial(link = "logit"), df)
best_model <- MASS::stepAIC(full_model)
## Start:  AIC=888.09
## response ~ ASSOCIATED_GROUP + Livestock + Companion_animal
## 
##                    Df Deviance    AIC
## - Companion_animal  1   873.56 887.56
## <none>                  872.09 888.09
## - Livestock         1   874.72 888.72
## - ASSOCIATED_GROUP  5   899.58 905.58
## 
## Step:  AIC=887.56
## response ~ ASSOCIATED_GROUP + Livestock
## 
##                    Df Deviance    AIC
## <none>                  873.56 887.56
## - Livestock         1   879.72 891.72
## - ASSOCIATED_GROUP  5   906.05 910.05
title <- gsub("response", "Nitr", deparse(best_model$formula))
anit <- plotROC(df, best_model, title)
anit

# --- Human
df <- get_data("human", "Nitrofurantoin")
full_model <- glm(response ~ ., binomial(link = "logit"), df %>%
                    select(-HOSPITAL_WARD))
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
tmp <- alias(full_model)$Complete
df %<>% select(-PATHOGEN)
full_model <- glm(response ~ ., binomial(link = "logit"), df %>%
                    select(-HOSPITAL_WARD))
best_model <- MASS::stepAIC(full_model)
## Start:  AIC=1130.35
## response ~ Clinical + SPECIFIC_GROUP + SEX + AGE + AGE_GROUP
## 
##                  Df Deviance    AIC
## - AGE_GROUP       9   1097.8 1117.8
## - Clinical        2   1093.0 1127.0
## - SPECIFIC_GROUP  5   1101.5 1129.5
## - AGE             1   1094.0 1130.0
## - SEX             1   1094.2 1130.2
## <none>                1092.3 1130.3
## 
## Step:  AIC=1117.77
## response ~ Clinical + SPECIFIC_GROUP + SEX + AGE
## 
##                  Df Deviance    AIC
## - Clinical        2   1098.5 1114.5
## - SPECIFIC_GROUP  5   1106.7 1116.7
## - SEX             1   1099.6 1117.6
## <none>                1097.8 1117.8
## - AGE             1   1102.9 1120.9
## 
## Step:  AIC=1114.47
## response ~ SPECIFIC_GROUP + SEX + AGE
## 
##                  Df Deviance    AIC
## - SEX             1   1100.4 1114.4
## <none>                1098.5 1114.5
## - SPECIFIC_GROUP  5   1109.5 1115.5
## - AGE             1   1103.9 1117.9
## 
## Step:  AIC=1114.42
## response ~ SPECIFIC_GROUP + AGE
## 
##                  Df Deviance    AIC
## <none>                1100.4 1114.4
## - SPECIFIC_GROUP  5   1111.3 1115.3
## - AGE             1   1105.6 1117.6
title <- gsub("response", "Nitr", deparse(best_model$formula))
hnit <- plotROC(df, best_model, title)
hnit

# --- Other
df <- get_data("other", "Nitrofurantoin")
full_model <- glm(response ~ ., binomial(link = "logit"), df)
best_model <- MASS::stepAIC(full_model)
## Start:  AIC=551.53
## response ~ Plant_pre_harvest + Plant_post_harvest + Water + Soil + 
##     Other + Hospital + ORIGIN
## 
##                      Df Deviance    AIC
## - ORIGIN              8   525.83 543.83
## - Plant_post_harvest  1   517.69 549.69
## - Plant_pre_harvest   1   518.05 550.05
## - Other               1   518.38 550.38
## - Water               1   519.45 551.45
## <none>                    517.53 551.53
## - Soil                1   520.74 552.74
## - Hospital            3   533.06 561.06
## 
## Step:  AIC=543.83
## response ~ Plant_pre_harvest + Plant_post_harvest + Water + Soil + 
##     Other + Hospital
## 
##                      Df Deviance    AIC
## - Plant_post_harvest  1   525.83 541.83
## - Plant_pre_harvest   1   526.22 542.22
## - Other               1   527.39 543.39
## <none>                    525.83 543.83
## - Water               1   528.74 544.74
## - Soil                1   529.61 545.61
## - Hospital            3   544.43 556.43
## 
## Step:  AIC=541.83
## response ~ Plant_pre_harvest + Water + Soil + Other + Hospital
## 
##                     Df Deviance    AIC
## - Plant_pre_harvest  1   527.05 541.05
## <none>                   525.83 541.83
## - Other              1   528.93 542.93
## - Soil               1   529.62 543.62
## - Water              1   532.48 546.48
## - Hospital           3   551.98 561.98
## 
## Step:  AIC=541.05
## response ~ Water + Soil + Other + Hospital
## 
##            Df Deviance    AIC
## <none>          527.05 541.05
## - Soil      1   530.65 542.65
## - Other     1   533.58 545.58
## - Water     1   540.77 552.77
## - Hospital  3   556.79 564.79
title <- gsub("response", "Nitr", deparse(best_model$formula))
onit <- plotROC(df, best_model, title)
onit

# Carbapenem --------------------------------------------------------------
# --- Animal
df <- get_data("animal", "Carbapenem")
table(df$response)
## 
##    0 
## 2488
# --- Human
df <- get_data("human", "Carbapenem")
full_model <- glm(response ~ ., binomial(link = "logit"), df %>%
                    select(-HOSPITAL_WARD))
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
tmp <- alias(full_model)$Complete
df %<>% select(-PATHOGEN)
full_model <- glm(response ~ ., binomial(link = "logit"), df %>%
                    select(-HOSPITAL_WARD))
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
df %<>% select(-AGE_GROUP)
full_model <- glm(response ~ ., binomial(link = "logit"), df %>%
                    select(-HOSPITAL_WARD))
best_model <- MASS::stepAIC(full_model)
## Start:  AIC=1353.52
## response ~ Clinical + SPECIFIC_GROUP + SEX + AGE
## 
##                  Df Deviance    AIC
## <none>                1333.5 1353.5
## - SEX             1   1340.8 1358.8
## - Clinical        2   1353.3 1369.3
## - AGE             1   1359.7 1377.7
## - SPECIFIC_GROUP  5   1369.9 1379.9
deparse(best_model$formula)
## [1] "response ~ Clinical + SPECIFIC_GROUP + SEX + AGE"
model2 <-  glmer(response ~ Clinical + SPECIFIC_GROUP + SEX + AGE + (1|HOSPITAL_WARD),
                 df, binomial(link = "logit"))
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : unable to evaluate scaled gradient
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge: degenerate Hessian with 1
## negative eigenvalues
# summary(allFit(model2))

title <- gsub("response", "Carb", deparse(best_model$formula))
hcar <- plotROC(df, best_model, title)
hcar

# --- Other
df <- get_data("other", "Carbapenem")
full_model <- glm(response ~ ., binomial(link = "logit"), df)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
table(df$response)
## 
##    0    1 
## 1863    6
# Trimethoprim ------------------------------------------------------------
# --- Animal
df <- get_data("animal", "Trimethoprim")
full_model <- glm(response ~ ., binomial(link = "logit"), df)
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
full_model <- glm(response ~ ., binomial(link = "logit"), df, maxit = 100)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
tmp <- alias(full_model)$Complete
colinear_variables <- tmp[, which(colSums(tmp) != 0), drop = F]
df %<>% select(-PATHOGEN, -Wild_animal)
full_model <- glm(response ~ ., binomial(link = "logit"), df)
best_model <- MASS::stepAIC(full_model)
## Start:  AIC=389.51
## response ~ ASSOCIATED_GROUP + Livestock + Companion_animal
## 
##                    Df Deviance    AIC
## - ASSOCIATED_GROUP  5   375.71 381.71
## - Livestock         1   374.54 388.54
## - Companion_animal  1   374.64 388.64
## <none>                  373.51 389.51
## 
## Step:  AIC=381.71
## response ~ Livestock + Companion_animal
## 
##                    Df Deviance    AIC
## <none>                  375.71 381.71
## - Companion_animal  1   390.44 394.44
## - Livestock         1   391.92 395.92
title <- gsub("response", "Tri", deparse(best_model$formula))
atri <- plotROC(df, best_model, title)
atri

# --- Human
df <- get_data("human", "Trimethoprim")
full_model <- glm(response ~ ., binomial(link = "logit"), df %>%
                    select(-HOSPITAL_WARD))
best_model <- MASS::stepAIC(full_model)
## Start:  AIC=1048.97
## response ~ Clinical + SPECIFIC_GROUP + PATHOGEN + SEX + AGE + 
##     AGE_GROUP
## 
##                  Df Deviance    AIC
## - AGE             1   990.98 1047.0
## - SEX             1   991.01 1047.0
## - AGE_GROUP       9  1008.04 1048.0
## - Clinical        2   994.48 1048.5
## <none>                990.97 1049.0
## - SPECIFIC_GROUP  5  1001.29 1049.3
## - PATHOGEN       10  1159.52 1197.5
## 
## Step:  AIC=1046.98
## response ~ Clinical + SPECIFIC_GROUP + PATHOGEN + SEX + AGE_GROUP
## 
##                  Df Deviance    AIC
## - SEX             1   991.02 1045.0
## - AGE_GROUP       9  1008.08 1046.1
## - Clinical        2   994.48 1046.5
## <none>                990.98 1047.0
## - SPECIFIC_GROUP  5  1001.29 1047.3
## - PATHOGEN       10  1159.66 1195.7
## 
## Step:  AIC=1045.02
## response ~ Clinical + SPECIFIC_GROUP + PATHOGEN + AGE_GROUP
## 
##                  Df Deviance    AIC
## - AGE_GROUP       9  1008.09 1044.1
## - Clinical        2   994.58 1044.6
## <none>                991.02 1045.0
## - SPECIFIC_GROUP  5  1001.34 1045.3
## - PATHOGEN       10  1159.73 1193.7
## 
## Step:  AIC=1044.09
## response ~ Clinical + SPECIFIC_GROUP + PATHOGEN
## 
##                  Df Deviance    AIC
## - Clinical        2   1011.9 1043.9
## <none>                1008.1 1044.1
## - SPECIFIC_GROUP  5   1019.4 1045.4
## - PATHOGEN       10   1176.0 1192.0
## 
## Step:  AIC=1043.93
## response ~ SPECIFIC_GROUP + PATHOGEN
## 
##                  Df Deviance    AIC
## <none>                1011.9 1043.9
## - SPECIFIC_GROUP  5   1027.2 1049.2
## - PATHOGEN       10   1177.6 1189.6
title <- gsub("response", "Tri", deparse(best_model$formula))
htri <- plotROC(df, best_model, title)
htri

# --- Other
df <- get_data("other", "Trimethoprim")
full_model <- glm(response ~ ., binomial(link = "logit"), df)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
table(df$response)
## 
##    0    1 
## 1855   14
# Tetracycline ------------------------------------------------------------
# --- Animal
df <- get_data("animal", "Tetracycline")
full_model <- glm(response ~ ., binomial(link = "logit"), df)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
tmp <- alias(full_model)$Complete
colinear_variables <- tmp[, which(colSums(tmp) != 0), drop = F]
df %<>% select(-PATHOGEN, -Wild_animal)
full_model <- glm(response ~ ., binomial(link = "logit"), df)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
model1 <- glm(response ~ ASSOCIATED_GROUP, binomial(link = "logit"), df)
model2 <- glm(response ~ Livestock + Companion_animal,
              binomial(link = "logit"), df)
AIC(model1, model2) # model2 is better
##        df      AIC
## model1  6 124.0903
## model2  3 116.2254
title <- gsub("response", "Tetr", deparse(best_model$formula))
atet <- plotROC(df, model2, title)
atet

# --- Human
df <- get_data("human", "Tetracycline")
full_model <- glm(response ~ ., binomial(link = "logit"), df %>%
                    select(-HOSPITAL_WARD))
best_model <- MASS::stepAIC(full_model)
## Start:  AIC=867.33
## response ~ Clinical + SPECIFIC_GROUP + PATHOGEN + SEX + AGE + 
##     AGE_GROUP
## 
##                  Df Deviance     AIC
## <none>                809.33  867.33
## - AGE             1   811.53  867.53
## - SEX             1   812.09  868.09
## - AGE_GROUP       9   836.14  876.14
## - SPECIFIC_GROUP  5   830.34  878.34
## - Clinical        2   830.20  884.20
## - PATHOGEN       10  1031.02 1069.02
title <- gsub("response", "Tetr", deparse(best_model$formula))
htet <- plotROC(df, best_model, title)
htet

# --- Other
df <- get_data("other", "Tetracycline")
full_model <- glm(response ~ ., binomial(link = "logit"), df)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
table(df$response)
## 
##    0    1 
## 1862    7
# Monobactam --------------------------------------------------------------
# --- Animal
df <- get_data("animal", "Monobactam")
full_model <- glm(response ~ ., binomial(link = "logit"), df)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
tmp <- alias(full_model)$Complete
colinear_variables <- tmp[, which(colSums(tmp) != 0), drop = F]
df %<>% select(-PATHOGEN, -Wild_animal)
full_model <- glm(response ~ ., binomial(link = "logit"), df)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
best_model <- MASS::stepAIC(full_model)
## Start:  AIC=192.26
## response ~ ASSOCIATED_GROUP + Livestock + Companion_animal
## 
##                    Df Deviance    AIC
## - ASSOCIATED_GROUP  5   181.75 187.75
## - Livestock         1   176.40 190.40
## - Companion_animal  1   176.91 190.91
## <none>                  176.26 192.26
## 
## Step:  AIC=187.75
## response ~ Livestock + Companion_animal
## 
##                    Df Deviance    AIC
## - Livestock         1   182.23 186.23
## <none>                  181.75 187.75
## - Companion_animal  1   189.79 193.79
## 
## Step:  AIC=186.23
## response ~ Companion_animal
## 
##                    Df Deviance    AIC
## <none>                  182.23 186.23
## - Companion_animal  1   193.39 195.39
title <- gsub("response", "Mono", deparse(best_model$formula))
amon <- plotROC(df, best_model, title)
amon

# --- Human
df <- get_data("human", "Monobactam")
full_model <- glm(response ~ ., binomial(link = "logit"), df %>%
                    select(-HOSPITAL_WARD))
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
tmp <- alias(full_model)$Complete
df %<>% select(-PATHOGEN)
full_model <- glm(response ~ ., binomial(link = "logit"), df %>%
                    select(-HOSPITAL_WARD))
best_model <- MASS::stepAIC(full_model)
## Start:  AIC=950.74
## response ~ Clinical + SPECIFIC_GROUP + SEX + AGE + AGE_GROUP
## 
##                  Df Deviance    AIC
## - AGE_GROUP       9   923.73 943.73
## - AGE             1   912.83 948.83
## - SEX             1   914.24 950.24
## <none>                912.74 950.74
## - Clinical        2   919.04 953.04
## - SPECIFIC_GROUP  5   926.15 954.15
## 
## Step:  AIC=943.73
## response ~ Clinical + SPECIFIC_GROUP + SEX + AGE
## 
##                  Df Deviance    AIC
## - SEX             1   925.38 943.38
## <none>                923.73 943.73
## - Clinical        2   931.48 947.48
## - AGE             1   929.61 947.61
## - SPECIFIC_GROUP  5   938.08 948.08
## 
## Step:  AIC=943.38
## response ~ Clinical + SPECIFIC_GROUP + AGE
## 
##                  Df Deviance    AIC
## <none>                925.38 943.38
## - AGE             1   930.97 946.97
## - Clinical        2   934.02 948.02
## - SPECIFIC_GROUP  5   940.93 948.93
title <- gsub("response", "Mono", deparse(best_model$formula))
hmon <- plotROC(df, best_model, title)
hmon

# --- Other
df <- get_data("other", "Monobactam")
full_model <- glm(response ~ ., binomial(link = "logit"), df)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
table(df$response)
## 
##    0    1 
## 1861    8
# Colistin ----------------------------------------------------------------
# --- Animal
df <- get_data("animal", "Colistin")
full_model <- glm(response ~ ., binomial(link = "logit"), df)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
tmp <- alias(full_model)$Complete
colinear_variables <- tmp[, which(colSums(tmp) != 0), drop = F]
df %<>% select(-PATHOGEN, -Wild_animal)
full_model <- glm(response ~ ., binomial(link = "logit"), df)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
model1 <- glm(response ~ ASSOCIATED_GROUP, binomial(link = "logit"), df)
model2 <- glm(response ~ Livestock + Companion_animal,
              binomial(link = "logit"), df)
AIC(model1, model2) # model2 is better
##        df      AIC
## model1  6 68.31727
## model2  3 61.48973
title <- gsub("response", "Col", deparse(best_model$formula))
acol <- plotROC(df, model2, title)
acol

# --- Human
df <- get_data("human", "Colistin")
full_model <- glm(response ~ ., binomial(link = "logit"), df %>%
                    select(-HOSPITAL_WARD))
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
tmp <- alias(full_model)$Complete
df %<>% select(-PATHOGEN)
full_model <- glm(response ~ ., binomial(link = "logit"), df %>%
                    select(-HOSPITAL_WARD))
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
df %<>% select(-AGE_GROUP)
full_model <- glm(response ~ ., binomial(link = "logit"), df %>%
                    select(-HOSPITAL_WARD))
best_model <- MASS::stepAIC(full_model)
## Start:  AIC=423.37
## response ~ Clinical + SPECIFIC_GROUP + SEX + AGE
## 
##                  Df Deviance    AIC
## - SEX             1   403.40 421.40
## - AGE             1   404.33 422.33
## <none>                403.37 423.37
## - SPECIFIC_GROUP  5   414.66 424.66
## - Clinical        2   422.97 438.97
## 
## Step:  AIC=421.4
## response ~ Clinical + SPECIFIC_GROUP + AGE
## 
##                  Df Deviance    AIC
## - AGE             1   404.38 420.38
## <none>                403.40 421.40
## - SPECIFIC_GROUP  5   414.66 422.66
## - Clinical        2   423.26 437.26
## 
## Step:  AIC=420.38
## response ~ Clinical + SPECIFIC_GROUP
## 
##                  Df Deviance    AIC
## <none>                404.38 420.38
## - SPECIFIC_GROUP  5   414.86 420.86
## - Clinical        2   425.34 437.34
title <- gsub("response", "Col", deparse(best_model$formula))
hcol <- plotROC(df, best_model, title)
hcol

# --- Other
df <- get_data("other", "Colistin")
full_model <- glm(response ~ ., binomial(link = "logit"), df)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
table(df$response)
## 
##    0    1 
## 1868    1
# animals <- cowplot::plot_grid(aam, acep, acol, aflu, afos, amon, anit, apen,
#                               apenc, app, atet, atri, ats, align = "vh",
#                               axis = "tb", ncol = 2, labels = letters[1:13])
# ggsave("animals.pdf", animals, width = 12, height = 24)
#
# humans <- cowplot::plot_grid(ham, hcar, hcep, hcol, hflu, hfos, hmon, hnit,
#                              hpen, hpenc, hpp, htet, htri, hts, align = "vh",
#                              axis = "tb", ncol = 2, labels = letters[1:14])
# ggsave("humans.pdf", humans, width = 12, height = 24)
#
# others <- cowplot::plot_grid(ocep, ofos, onit, open, openc, opp, align = "vh",
#                              axis = "tb", ncol = 2, labels = letters[1:6])
# ggsave("others.pdf", others, width = 12, height = 10)